Packages

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

# Set workspace -----------------------------------------------------------
library(magrittr)
library(tidyverse)
library(reshape)
library(readxl)
library(limma)
library(edgeR)
library(biomaRt)
library(AnnotationHub)
library(GenomicRanges)
library(GenomicFeatures)
library(rtracklayer)
library(fgsea)
library(RUVSeq)
library(ngsReports)
library(metap)
library(harmonicmeanp)
library(ggpubr)
library(RColorBrewer)
library(grid)
library(gridExtra)
library(scales)
library(knitr)
library(kableExtra)
library(pheatmap)
library(ggrepel)
library(ggeasy)
library(ggfortify) # to allow ggplot2::autoplot() to work
library(pathview)
library(VennDiagram)
library(pander)
library(UpSetR)

~~ Introduction ~~

Here is my analysis for the sorl1 4-way, 6 month RNA-seq experiment. This analysis investigates the effects of the mutations V1482Afs, and R122Pfs (null) in sorl1 on 6 month old zebrafish brains. V1482Afs models the C1478* mutation in human SORL1 which was described in Pottier et al. (2012). R122Pfs is a putative null mutation which produces a premature stop codon before any of the coding sequences which encode the functional domains of the zebrafish Sorl1 protein. The family of zebrafish in this analysis arise from a pair-mating between fish of genotypes R122Pfs/+ x V1482Afs/+. Therefore, fish in the family have genotypes R122Pfs/+ (null/+), V1482Afs/+ (EOfAD-like), R122Pfs/V1482Afs (transheterozygous) and +/+ (non-mutant). The family was raised together in the same tanks to reduce environmental variation. Since the family contained ~100 fish, they were distributed over 3 tanks as to not stress them by overcrowding.

When the family was 6 months of age, 50 fish were sacrificed and had their brains removed. Genotyping was performed by PCR then 24 fish of equal genotypes and sex had brain total RNA extracted and DNase I treatment. Then the DNaseI treated total RNA was sent to SAHMRI for RNA-seq (polyA+, single ended, 75bp insert). The resulting fastq files were then processed using bash scripts to produce a counts matrix (kallisto was used to estimate transcript abundances). Kallisto settings were mean frag length = 300, sd = 60, 50 bootstraps. The index file used for the kallisto pseudo-alignment was generated from the zebrafish transcriptome according to the primary assembly of the GRCz11 reference (Ensembl release 96), with the sequences for unspliced transcripts additionally included.None of the sorl1 alternate transcripts were added to the transcriptome as sorl1 is a long gene, and kallisto couldnt tell the alternate transcripts apart (in a previous draft analysis not shown here),

Read in genomic ranges object and make annotation ojects

gr <- import.gff("ftp://ftp.ensembl.org/pub/release-96/gtf/danio_rerio/Danio_rerio.GRCz11.96.gtf.gz")

zfGeneNames <-
  gr %>% 
  mcols %>% 
  subset(type == "gene") %>% 
  as_tibble() %>% 
  dplyr::select(c(gene_id, gene_name)) 

tr2gn <- gr %>%
  subset(!is.na(transcript_id)) %>%
  subset(type == "transcript") %>%
  mcols() %>%
  as.data.frame() %>%
  dplyr::select(gene_id, transcript_id) %>%
  as_tibble()

# also need length and gc content
ah <- AnnotationHub() %>%
  subset(species == "Danio rerio") %>%
  subset(rdataclass == "EnsDb")

ensDb <- ah[["AH74989"]]

grTrans <- transcripts(ensDb)

trLengths <- exonsBy(ensDb, "tx") %>%
  width() %>%
  vapply(sum, integer(1))

mcols(grTrans)$length <- trLengths[names(grTrans)]


gcGene <- grTrans %>%
  mcols() %>%
  as.data.frame() %>%
  dplyr::select(gene_id, tx_id, gc_content, length) %>%
  as_tibble() %>%
  group_by(gene_id) %>%
  summarise(
    gc_content = sum(gc_content*length) / sum(length),
    length = ceiling(median(length))
  )

grGenes <- genes(ensDb)

mcols(grGenes) %<>%
  as.data.frame() %>%
  left_join(gcGene) %>%
  as.data.frame() %>%
  DataFrame()

Metadata of samples

This is the speadsheet containing all the info of the fish.

meta <- read_excel("meta_4way_6m_RNA-seq.xlsx") %>% 
  dplyr::filter(!is.na(Position)) %>% # filter out the ones i didnt use 
  dplyr::select(c(Sex, Genotype, Tank, Batch_killed, RNA_Seq_id, new_sample_id)) %>% 
  mutate(
    Sex = as.factor(Sex), 
    Genotype = as.factor(Genotype), 
    Tank = as.factor(Tank),
    Batch_killed = as.factor(Batch_killed)
    ) %>% 
  mutate(
    sample = RNA_Seq_id %>% 
      str_replace_all(pattern = "_", replacement = "-"), 
    Genotype_forPub = case_when(
      Genotype == "R122Pfs/+" ~ "null/+", 
      Genotype == "trans" ~ "transhet", 
      Genotype == "V1482Afs/+" ~ "V1482Afs/+", 
      Genotype == "WT" ~ "+/+"),
      Sex = case_when(
        Sex == "male" ~ "Male", 
        Sex == "fem" ~ "Female")
    )  %>% 
  mutate(Genotype_2 = as.factor(Genotype %>% 
                                  str_remove(pattern = "/\\+")
                                )
         )

Make the geneDGE and filter lowly expressed genes

Genes which are lowly expressed in RNA-seq data are not informative for differential gene expression analysis. Since one count in one RNA-seq library and 3 counts in another library does not necesserily mean a 3 fold change. Therefore we need to filter out any genes which are lowly expressed. A general rule for filtering is having a cpm above 10/(min_lib_size/1000000) in at least half of the RNA-seq libraries. (0.66 cpm).

# # Now can make the gene DGE object
counts <- list.files(path = here::here("kallisto_out/"), full.names = TRUE) %>%
  catchKallisto()
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//01-transhet-1_S1_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//02-V1482Afs-4_S2_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//03-WT-1_S3_merged_R1_001, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//04-R122Pfs-1_S4_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//05-transhet-4_S5_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//06-transhet-2_S6_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//07-WT-5_S7_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//08-WT-3_S8_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//09-transhet-3_S9_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//10-R122Pfs-4_S10_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//11-transhet-5_S11_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//12-V1482Afs-1_S12_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//13-R122Pfs-6_S13_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//14-V1482Afs-2_S14_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//15-WT-9_S15_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//16-R122Pfs-3_S16_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//17-V1482Afs-5_S17_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//18-R122Pfs-2_S18_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//19-transhet-6_S19_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//20-WT-6_S20_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//21-R122Pfs-5_S21_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//22-V1482Afs-6_S22_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//23-V1482Afs-3_S23_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
## Reading /Users/karissabarthelson/Documents/sorl1_4way/sorl1_4way_RNASeq_6m/kallisto_out//24-WT-4_S24_merged_R1_001_T1.fastq.gz, 83803 transcripts, 50 bootstraps
colnames(counts$counts) %<>%
  basename() %>%
  str_replace_all(pattern = "_S(.+)_merged_R1_001", "") %>%
  str_replace_all(pattern = "^(0|1|2).-", replacement = "") %>%
  str_replace_all(pattern = "_T1.fastq.gz", replacement = "")

counts$counts %>% 
  as.data.frame() %>% 
  rownames_to_column("ensembl_transcript_id") %>% 
  write_csv("catchKallisto_output.csv")

geneDGE <-
  counts$counts %>%
  as.data.frame() %>%
  rownames_to_column("transcript_id") %>%
  dplyr::filter(!grepl("unspliced", transcript_id)) %>%
  as_tibble() %>%
  mutate(
    transcript_id = str_remove_all(transcript_id, "\\.[0-9]+")
    ) %>%
  gather(key = "Sample", value = "Counts", 2:25) %>%
  left_join(tr2gn) %>%
  group_by(Sample, gene_id) %>%
  summarise(Counts = sum(Counts)) %>%
  spread(key = "Sample", value = "Counts") %>%
  column_to_rownames("gene_id") %>%
  DGEList() %>%
  calcNormFactors(method = "TMM")

geneDGE$samples %<>% 
  rownames_to_column("sample") %>% 
  left_join(meta, by = "sample") 

#Set the genotype WT as the reference level for DE analysis later
geneDGE$samples$Genotype <- relevel(geneDGE$samples$Genotype, "WT")

geneDGE$genes <-
  grGenes %>% 
  as_tibble() %>% 
  dplyr::select(gene_id, gene_id, gc_content, length, symbol, description) %>% 
  dplyr::filter(gene_id %in% rownames(geneDGE$counts)) %>% 
  arrange(gene_id) %>% 
  mutate(ensembl_gene_id = gene_id) %>% 
  column_to_rownames("ensembl_gene_id")

keepThesegenes <- (rowSums(cpm(geneDGE) > 0.66) >= 12) 

#before filtering
geneDGE %>% 
  cpm(log = TRUE) %>% 
  melt %>% 
  dplyr::filter(is.finite(value)) %>% 
  ggplot(aes(x = value, colour = X2)) +
  geom_density() + 
  guides(colour = FALSE) +
  ggtitle("Before filtering") +
  labs(x = "logCPM", y = "Proportion of Genes") +
  theme_bw()

#after filtering
geneDGE <- geneDGE[keepThesegenes,, keep.lib.sizes=FALSE]

geneDGE %>% 
  cpm(log = TRUE) %>% 
  melt %>% 
  dplyr::filter(is.finite(value)) %>% 
  ggplot(aes(x = value, colour = X2)) +
  geom_density() + 
  guides(colour = FALSE) +
  ggtitle("After filtering") +
  labs(x = "logCPM", y = "Proportion of Genes") +
  theme_bw()

library sizes

Library sizes afer filterling lowly expressed genes.

geneDGE$samples %>% 
  mutate(`Library size (millions)` = lib.size/1e6) %>% 
  ggplot(aes(new_sample_id, `Library size (millions)`, fill = Genotype_forPub)) +
  geom_col() +
  theme_bw() +
  theme(legend.position = "none", aspect.ratio = 1) +
  xlab("") +
  easy_rotate_labels(which = "x", angle = 315) +
  ggsave("plots/libsize.png", height = 3, width = 3.5, scale = 1.2)

~~ Principle component analysis ~~

The overall similarities between samples can be visualised by principle component analysis (PCA). Some clustering of samples is observed, *i.e the V1482Afs and R122Pfs samples. However, the WT and transhets appear to be quite variable.

pca_raw <- geneDGE$counts %>% 
  cpm(log = T) %>% 
  t() %>% 
  prcomp()

pca_raw %>% 
  autoplot(
    data = tibble(sample = rownames(.$x)) %>%
      left_join(geneDGE$samples),
    colour = "Genotype_forPub", 
    shape = "Sex", 
    size = 4
  ) +
  geom_text_repel(
    aes(label = new_sample_id, colour = Genotype_forPub),
    show.legend = FALSE
  ) +
  theme_bw()

~~ DE analysis (TMM normalisation) ~~

Here, I will use the generalised linear model functionality of edgeR and likelihood ratio tests to look for DE genes.

Design matrix is shown below. WT is set as the intercept (i.e. the reference level).

#design with WT as the intercept
design <- model.matrix(~Genotype + Sex + Tank, data = geneDGE$samples) %>% 
  set_colnames(gsub("Genotype", "", colnames(.)))

design %>% 
  cbind(geneDGE$samples %>% dplyr::select(sample)) %>% 
  remove_rownames() %>% 
  column_to_rownames("sample") %>% 
  kable(caption = "Design matrix. a 1 designates that this coefficient is active") %>% 
  kable_styling()
Design matrix. a 1 designates that this coefficient is active
(Intercept) R122Pfs/+ trans V1482Afs/+ SexMale Tank2 Tank3
R122Pfs-1 1 1 0 0 1 1 0
R122Pfs-2 1 1 0 0 1 0 1
R122Pfs-3 1 0 0 0 1 1 0
R122Pfs-4 1 1 0 0 0 0 0
R122Pfs-5 1 0 0 0 0 0 0
R122Pfs-6 1 1 0 0 0 0 0
transhet-1 1 0 1 0 0 0 0
transhet-2 1 0 1 0 0 1 0
transhet-3 1 0 1 0 0 0 1
transhet-4 1 0 1 0 1 1 0
transhet-5 1 0 1 0 1 0 0
transhet-6 1 0 1 0 1 0 1
V1482Afs-1 1 0 0 1 1 0 0
V1482Afs-2 1 0 0 1 1 1 0
V1482Afs-3 1 0 0 1 1 0 1
V1482Afs-4 1 0 0 1 0 0 0
V1482Afs-5 1 0 0 1 0 1 0
V1482Afs-6 1 0 0 1 0 0 1
WT-1 1 0 0 0 0 0 0
WT-3 1 0 0 0 0 0 1
WT-4 1 0 0 0 1 1 0
WT-5 1 0 0 0 1 0 1
WT-6 1 0 0 0 1 0 0
WT-9 1 0 0 0 0 0 1

fit object

The code below fits the GLM (negative binomial variance function).

fit <- geneDGE %>% 
  estimateDisp(design) %>% 
  glmFit(design)

toptables

The code below performs the tests for differential expression using likelihood ratio tests from edgeR.

topTables_tmm <- design %>% colnames() %>% .[2:4] %>% 
   sapply(function(x){
    glmLRT(fit, coef = x) %>%
      topTags(n = Inf) %>%
      .[["table"]] %>%
      as_tibble() %>%
      arrange(PValue) %>%
       mutate(
        coef = x,
        DE = FDR < 0.05
      ) %>% 
      dplyr::select(
        symbol, logFC, logCPM, PValue, FDR, DE, everything()  
      )
      
  }, simplify = FALSE)

The tables below contain the DE genes in each comparison.

topTables_tmm$`V1482Afs/+` %>%
  dplyr::filter(DE == TRUE) %>% 
  kable(caption = "DE genes, V1482Afs/+ vs WT comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) 
DE genes, V1482Afs/+ vs WT comparison
symbol logFC logCPM PValue FDR DE gene_id gc_content length description LR coef
cox7a1 -2.590368 1.440764 0 2.3e-06 TRUE ENSDARG00000069464 38.64198 570 cytochrome c oxidase subunit 7A1 [Source:ZFIN;Acc:ZDB-GENE-060929-340] 41.23949 V1482Afs/+
topTables_tmm$`R122Pfs/+` %>%
  dplyr::filter(DE == TRUE) %>% 
  kable(caption = "DE genes, null/+ vs WT comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
DE genes, null/+ vs WT comparison
symbol logFC logCPM PValue FDR DE gene_id gc_content length description LR coef
clca1 -9.282431 4.3371714 0.0e+00 0.0000000 TRUE ENSDARG00000016290 41.11958 3412 chloride channel accessory 1 [Source:ZFIN;Acc:ZDB-GENE-030131-6221] 59.77174 R122Pfs/+
plin2 -2.541092 2.5789109 0.0e+00 0.0000002 TRUE ENSDARG00000042332 48.52199 1306 perilipin 2 [Source:ZFIN;Acc:ZDB-GENE-050913-12] 44.88084 R122Pfs/+
pimr191 2.602969 1.7057146 0.0e+00 0.0000050 TRUE ENSDARG00000055683 49.24027 2106 Pim proto-oncogene, serine/threonine kinase, related 191 [Source:ZFIN;Acc:ZDB-GENE-131119-3] 37.56267 R122Pfs/+
itgae.1 -6.534700 1.5276286 0.0e+00 0.0000300 TRUE ENSDARG00000098012 38.42025 2608 integrin, alpha E, tandem duplicate 1 [Source:ZFIN;Acc:ZDB-GENE-131121-125] 33.50384 R122Pfs/+
CABZ01061592.2 -6.673181 0.7887449 9.8e-06 0.0331827 TRUE ENSDARG00000104631 40.63564 4405 NULL 19.54244 R122Pfs/+
topTables_tmm$trans %>%  
  dplyr::filter(DE == TRUE) %>% 
  kable(caption = "DE genes,transhet vs WT comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) 
DE genes,transhet vs WT comparison
symbol logFC logCPM PValue FDR DE gene_id gc_content length description LR coef
pimr191 2.9264454 1.705715 0.0e+00 0.0000000 TRUE ENSDARG00000055683 49.24027 2106 Pim proto-oncogene, serine/threonine kinase, related 191 [Source:ZFIN;Acc:ZDB-GENE-131119-3] 54.76235 trans
sorl1 -0.5620248 6.908258 0.0e+00 0.0000848 TRUE ENSDARG00000013892 49.25999 8581 sortilin-related receptor, L(DLR class) A repeats containing [Source:ZFIN;Acc:ZDB-GENE-050208-22] 32.83008 trans
rab4b 1.2364312 3.137467 5.0e-07 0.0023009 TRUE ENSDARG00000012177 38.37866 2970 RAB4B, member RAS oncogene family [Source:ZFIN;Acc:ZDB-GENE-040822-15] 25.37880 trans
ftr90 -2.3389031 2.146210 6.0e-07 0.0023009 TRUE ENSDARG00000097373 44.29315 2497 finTRIM family, member 90 [Source:ZFIN;Acc:ZDB-GENE-131122-80] 24.88251 trans
dnase2 -1.3971892 2.004203 7.0e-07 0.0023009 TRUE ENSDARG00000073893 39.93927 3129 deoxyribonuclease II, lysosomal [Source:ZFIN;Acc:ZDB-GENE-010724-16] 24.66467 trans
cox7a1 -1.8084452 1.440764 3.5e-06 0.0099195 TRUE ENSDARG00000069464 38.64198 570 cytochrome c oxidase subunit 7A1 [Source:ZFIN;Acc:ZDB-GENE-060929-340] 21.50480 trans

Visualisation

Below are some plots visualising the differential expression. #### MD plots

topTables_tmm %>%
  bind_rows() %>% 
  ggplot(aes(x = logCPM, y = logFC,  colour = DE)) +
  geom_point(
    alpha = 0.5
  ) +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  geom_label_repel(
    aes(label = symbol), 
    data = .  %>% dplyr::filter(FDR < 0.05), 
    show.legend = FALSE
    ) +
  theme(legend.position = "none") +
  ggtitle("MD Plots of different sorl1 mutant comparisons to WT with TMM normalisation") +
  scale_color_manual(values = c("grey50", "red"))

Volcano plots

topTables_tmm %>%
  bind_rows() %>% 
  ggplot(aes(y = -log10(PValue), x = logFC, colour = DE)) +
  geom_point(
    alpha = 0.5
  ) +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  geom_label_repel(
    aes(label = symbol), 
    data = .  %>% dplyr::filter(FDR < 0.05), 
    show.legend = FALSE
    ) +
  theme(legend.position = "none") +
  ggtitle("Volcano Plots of different sorl1 mutant comparisons to WT with TMM normalisation") +
  scale_color_manual(values = c("grey50", "red"))

check for GC and length bias

A ranking statistic of the sign of the logFC multiplied by the log10 of the p-value was calculated for each gene. I plotted this ranking statistic against the gc content and length of each gene to see if there are any biases observed. If there was, I would use the cqn normalisation method to correct for this. No apparent biases were observed, therefore cqn is not appropriate.

Note that the limits for the y axis (ranking stat) are zoomed in for visualisation purposes.

topTables_tmm %>% 
  bind_rows() %>% 
  mutate(rankstat = logFC*log10(PValue)) %>% 
  ggplot(aes(x = length, y = rankstat)) +
  geom_point(
    aes(colour = DE),
    alpha = 0.5
  ) +
  geom_smooth(se = FALSE, method = "gam") +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  theme(legend.position = "none") +
  scale_color_manual(values = c("grey50", "red")) +
  scale_x_log10()+
  coord_cartesian(ylim = c(-2.5, 2.5))

topTables_tmm %>% 
  bind_rows() %>% 
  mutate(rankstat = logFC*log10(PValue)) %>% 
  ggplot(aes(x = gc_content, y = rankstat)) +
  geom_point(
    aes(colour = DE),
    alpha = 0.5
  ) +
  geom_smooth(se = FALSE, method = "gam") +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  theme(legend.position = "none") +
  scale_color_manual(values = c("grey50", "red")) +
  coord_cartesian(ylim = c(-5,5))

~~ GSEAs of TMM normalised counts ~~

To get a more complete idea of the changes to gene expression in the mutant brains, I will perform some gene set testing. Fry from the limma package, performs the fast approximation of the ROAST method. The ROAST method is a self contained gene set testing method, which tests whether any of the gene sets contains DE genes. Instead of permutation tests (like GSEA, which assumes that genes are independent from each other, which is not true), it performs a rotation, a Monte Carlo technology for multivariate regression and more accurately will predict whether a gene set is changed.

Here, I will perform FRY on the HALLMARK and KEGG pathway gene sets which were downloaded from MSigDB (http://www.broadinstitute.org/msigdb) as a .gmt file with human entrez ids. These human entrez ids will be converted to zebrafish ensembl ids using a mapping file downloaded fro biomart. Some genes were found not to have a comlimentary zebrafish ensembl id, and this resulted in some of the KEGG pathway gene sets having only a small number of genes. Therefore, only KEGG pathway gene sets with more than 10 genes after conversion to zf ids were retained.

Read in gene sets

# need the zf2human gene mapping file from my W1818x RNA-seq analysis
zf2humangeneMapping <-  read_delim("gene_sets/zf2human_entrez.txt",delim =  ",") %>% 
    set_colnames(c("hu_gene_id", "hu_gene_name", "Entrez", "gene_id", "gene_name"))

# import KEGG gene sets
KEGG <- gmtPathways("gene_sets/c2.cp.kegg.v7.1.entrez.gmt")

KEGG %<>%   
  lapply(function(x){
    zf2humangeneMapping %>%
      dplyr::filter(Entrez %in% x, 
                    gene_id %in% rownames(geneDGE)) %>%
      .[["gene_id"]] %>% 
        unique()
  })

KEGG_sizes <- KEGG %>% 
  lapply(length) %>% 
  unlist %>% 
  as.data.frame() %>% 
  set_colnames( "n_genes") %>% 
  rownames_to_column("pathway")

# retain gene sets with at least 10 genes in it
KEGG <- KEGG[KEGG_sizes %>% dplyr::filter(n_genes > 10) %>% .$pathway]

# Hallmark gene sets

hallmark <- gmtPathways("gene_sets/h.all.v7.1.entrez.gmt") %>%
  lapply(function(x){
    zf2humangeneMapping %>%
      dplyr::filter(Entrez %in% x, 
                   gene_id %in% rownames(geneDGE)) %>%
      .[["gene_id"]]
  })

hallmark_sizes <- hallmark %>% 
  lapply(length) %>% 
  unlist %>% 
  as.data.frame() %>% 
  set_colnames( "n_genes") %>% 
  rownames_to_column("pathway")

fry

The code below will perform the fry method for each comparison. No significant gene sets were observed.

fryRes_TMM_kg <- 
  design %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    cpm(geneDGE$counts, log = T) %>% 
    fry(
      index = KEGG,
      design = design, 
      contrast = x, 
      sort = "directional"
    ) %>% 
    rownames_to_column("pathway") %>% 
    as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05 | FDR.Mixed < 0.05, 
             p_bonf = p.adjust(PValue, "bonf"))
  }, simplify = FALSE)

fryRes_TMM_hm <- 
  design %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    cpm(geneDGE$counts, log = T)  %>% 
    fry(
      index = hallmark,
      design = design, 
      contrast = x, 
      sort = "directional"
    ) %>% 
    rownames_to_column("pathway") %>% 
    as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05 | FDR.Mixed < 0.05, 
             p_bonf = p.adjust(PValue, "bonf"))
  }, simplify = FALSE)

fryRes_TMM_hm %>% 
  bind_rows() %>% 
  bind_rows(fryRes_TMM_kg %>% bind_rows()) %>% 
  dplyr::arrange(PValue) %>% 
  head(10) %>% 
  kable(caption = "top 10 most altered pathways. No pathways even approached being significantly altered.") %>% 
  kable_styling("basic")
top 10 most altered pathways. No pathways even approached being significantly altered.
pathway NGenes Direction PValue FDR PValue.Mixed FDR.Mixed coef sig p_bonf
KEGG_STARCH_AND_SUCROSE_METABOLISM 29 Down 0.0182654 0.9993872 0.6315558 0.9486453 R122Pfs/+ FALSE 1
KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION 44 Up 0.0447168 0.6854738 0.0932308 0.7578406 V1482Afs/+ FALSE 1
KEGG_TERPENOID_BACKBONE_BIOSYNTHESIS 13 Down 0.0538450 0.9993872 0.6083643 0.9486453 R122Pfs/+ FALSE 1
KEGG_AMINO_SUGAR_AND_NUCLEOTIDE_SUGAR_METABOLISM 40 Down 0.0542743 0.9993872 0.5688814 0.9486453 R122Pfs/+ FALSE 1
KEGG_RNA_POLYMERASE 25 Up 0.0600471 0.9292446 0.3377184 0.8338230 trans FALSE 1
KEGG_THYROID_CANCER 32 Down 0.0669904 0.9292446 0.4262375 0.8338230 trans FALSE 1
KEGG_MATURITY_ONSET_DIABETES_OF_THE_YOUNG 12 Up 0.0692501 0.6854738 0.3422673 0.7578406 V1482Afs/+ FALSE 1
KEGG_PRIMARY_BILE_ACID_BIOSYNTHESIS 18 Down 0.0729705 0.9993872 0.7813288 0.9486453 R122Pfs/+ FALSE 1
KEGG_STARCH_AND_SUCROSE_METABOLISM 29 Down 0.0792493 0.9292446 0.5248509 0.8338230 trans FALSE 1
KEGG_BASAL_CELL_CARCINOMA 58 Up 0.0823560 0.6854738 0.3407576 0.7578406 V1482Afs/+ FALSE 1

~~ RUVSeq to remove unwanted variation ~~

Since the largest source of variation in this dataset appeared to be due to library size, I can use the RUV method to remove this variation. RUV has 3 methods of estimating the unwanted variation:

• RUVg uses negative control genes, assumed to have constant expression across samples.

• RUVs uses centered (technical) replicate/negative control samples for which the covariates of interest are constant.

• RUVr uses residuals, e.g., from a first-pass GLM regression of the counts on the covariates of interest.

Here, I will use the RUVg method using negative control genes, defining the 5000 least DE genes from an ANOVA-like test for differential expression using edgeR. 5000 genes are what was used in the RUVSeq vignette.

RUVneg <- 
  geneDGE %>% 
  estimateDisp(design) %>%
  glmFit(design) %>%
  glmLRT(coef = 2:4) %>% # use all 3 sorl1 genotype coefs
  topTags(n = Inf, adjust.method = "fdr", sort.by = "p") %>% 
  .$table %>% 
  arrange(desc(PValue)) %>% 
  .[1:5000,] %>% 
  .$gene_id
### Perform the RUVg method with 1 factor of unwanted variation removed, 
RUV_k1 <- geneDGE$counts %>% 
  round %>% 
  RUVg(RUVneg, 1)

PCA

PCA analysis was repeated on the RUV normalised counts. Some seperation across PC1 was observed for WT and V1482Afs/+ samples (except for sample WT-4). The null/+ samples were seperated from the V1482Afs samples, but seem to be within the WT cluster, suggesting the null samples have a similar gene expression profile to the WTs. The transhets were fairly dispersed across PC1, with transhet1 being quite distant from the rest (likely an outlier) I will retain transhet-1 at this stage. But will keep it in mind when i do the GSEAs later and look at whether it may be driving an enrichment of a gene set.

pca_RUV_k1 <- RUV_k1$normalizedCounts %>% 
  as.data.frame() %>% 
    cpm(log = T) %>% 
    t() %>% 
    prcomp()

pca_RUV_k1 %>% 
  autoplot(
    data = tibble(sample = rownames(.$x)) %>%
      left_join(geneDGE$samples),
    colour = "Genotype_forPub", 
    shape = "Sex", 
    size = 4
  ) +
  geom_text_repel(
    aes(label = new_sample_id, colour = Genotype_forPub),
    show.legend = FALSE
  ) + 
  theme_bw() 

PC1 no longer depends on library size

pca_RUV_k1$x %>% 
  as.data.frame() %>% 
  rownames_to_column("sample") %>% 
  left_join(geneDGE$samples) %>% 
  mutate(lib.size = lib.size/1e6) %>% 
  ggplot(aes(PC1, lib.size)) +
  geom_point(
    aes(colour = Genotype_forPub), 
    size = 4
    ) +
  geom_text_repel(
    aes(label = new_sample_id, colour = Genotype_forPub), 
    show.legend = FALSE
    ) +
  theme_bw()

~~ DE analysis with RUV k1 ~~

I now will perform the DE analysis, including the W1 covariate in the model. To do this, I need to add in the W_1 value to the design matrix:

geneDGE$samples %<>% 
  cbind(RUV_k1$W)

design_RUVk1 <- 
  model.matrix(~Genotype + Sex + Tank + W_1, data = geneDGE$samples) %>% 
  set_colnames(gsub("Genotype", "", colnames(.)))

repeat the fit and toptables

fit_RUVk1 <- geneDGE %>% 
  estimateDisp(design_RUVk1) %>% 
  glmFit(design_RUVk1)

toptables

topTables_RUVk1 <- design_RUVk1 %>% colnames() %>% .[2:4] %>% 
   sapply(function(x){
    glmLRT(fit_RUVk1, coef = x) %>%
      topTags(n = Inf) %>%
      .[["table"]] %>%
      as_tibble() %>%
      arrange(PValue) %>%
       mutate(
        coef = x,
        DE = FDR < 0.05
      ) %>% 
      dplyr::select(
        symbol, logFC, logCPM, PValue, FDR, DE, everything()  
      )
      
  }, simplify = FALSE)

# # Make a copy of the Toptables for export
# topTables_RUVk1_forExport <- topTables_RUVk1
# 
# # Change the names so that it can be written out
# names(topTables_RUVk1_forExport) <- c("Null", "V1482Afs", "transhet")
# 
# writexl::write_xlsx(topTables_RUVk1_forExport, path = "DE_Res/Differential_Expression_Results_RUV_k1.xlsx")
# 
# # Remove the copy
# remove(topTables_RUVk1_forExport)

The tables show the DE genes in each comparison

topTables_RUVk1$`V1482Afs/+` %>%
  dplyr::filter(DE == TRUE) %>% 
  kable(caption = "DE genes, V1482Afs/+ vs WT comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) 
DE genes, V1482Afs/+ vs WT comparison
symbol logFC logCPM PValue FDR DE gene_id gc_content length description LR coef
cox7a1 -2.44458 1.430217 0 2e-07 TRUE ENSDARG00000069464 38.64198 570 cytochrome c oxidase subunit 7A1 [Source:ZFIN;Acc:ZDB-GENE-060929-340] 46.36492 V1482Afs/+
topTables_RUVk1$`R122Pfs/+` %>%
  dplyr::filter(DE == TRUE) %>% 
  kable(caption = "DE genes, null/+ vs WT comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
DE genes, null/+ vs WT comparison
symbol logFC logCPM PValue FDR DE gene_id gc_content length description LR coef
clca1 -9.2261626 4.3361910 0.00e+00 0.0000000 TRUE ENSDARG00000016290 41.11958 3412 chloride channel accessory 1 [Source:ZFIN;Acc:ZDB-GENE-030131-6221] 67.43154 R122Pfs/+
plin2 -2.5378695 2.5800775 0.00e+00 0.0000000 TRUE ENSDARG00000042332 48.52199 1306 perilipin 2 [Source:ZFIN;Acc:ZDB-GENE-050913-12] 49.91796 R122Pfs/+
pimr191 2.5882277 1.6968427 0.00e+00 0.0000018 TRUE ENSDARG00000055683 49.24027 2106 Pim proto-oncogene, serine/threonine kinase, related 191 [Source:ZFIN;Acc:ZDB-GENE-131119-3] 39.52453 R122Pfs/+
itgae.1 -6.4329375 1.5490807 0.00e+00 0.0000035 TRUE ENSDARG00000098012 38.42025 2608 integrin, alpha E, tandem duplicate 1 [Source:ZFIN;Acc:ZDB-GENE-131121-125] 37.69974 R122Pfs/+
cuedc1b 1.6632437 3.7044499 0.00e+00 0.0001495 TRUE ENSDARG00000054748 50.21272 1855 CUE domain containing 1b [Source:ZFIN;Acc:ZDB-GENE-070424-29] 29.95017 R122Pfs/+
krt15 1.2285960 2.2445444 1.00e-07 0.0002202 TRUE ENSDARG00000036840 51.77994 1545 keratin 15 [Source:ZFIN;Acc:ZDB-GENE-040426-2931] 28.84666 R122Pfs/+
nme2b.2 1.3118024 3.1409487 1.10e-06 0.0025711 TRUE ENSDARG00000099420 56.35019 811 NME/NM23 nucleoside diphosphate kinase 2b, tandem duplicate 2 [Source:ZFIN;Acc:ZDB-GENE-000210-33] 23.80276 R122Pfs/+
vmp1 -0.3342902 5.0536939 1.50e-06 0.0028014 TRUE ENSDARG00000012450 44.06607 2060 vacuole membrane protein 1 [Source:ZFIN;Acc:ZDB-GENE-030131-8733] 23.17537 R122Pfs/+
ckmb 3.1408574 1.5788179 1.50e-06 0.0028014 TRUE ENSDARG00000040565 51.30344 1155 creatine kinase, muscle b [Source:ZFIN;Acc:ZDB-GENE-040426-2128] 23.15420 R122Pfs/+
pvalb4 2.5604268 1.4538921 2.00e-06 0.0034177 TRUE ENSDARG00000024433 43.28125 1280 parvalbumin 4 [Source:ZFIN;Acc:ZDB-GENE-040625-48] 22.56954 R122Pfs/+
CABZ01061592.2 -6.6443497 0.7894372 2.90e-06 0.0044594 TRUE ENSDARG00000104631 40.63564 4405 NULL 21.87571 R122Pfs/+
hsph1 -0.4412418 4.2977371 4.60e-06 0.0064980 TRUE ENSDARG00000019874 42.96747 2137 heat shock 105/110 protein 1 [Source:ZFIN;Acc:ZDB-GENE-120410-4] 20.98701 R122Pfs/+
mylpfa 2.5436823 1.0337324 1.73e-05 0.0224639 TRUE ENSDARG00000053254 47.27928 1388 myosin light chain, phosphorylatable, fast skeletal muscle a [Source:ZFIN;Acc:ZDB-GENE-990712-15] 18.46362 R122Pfs/+
ckma 2.3173129 0.9323376 3.00e-05 0.0361320 TRUE ENSDARG00000035327 51.07323 1584 creatine kinase, muscle a [Source:ZFIN;Acc:ZDB-GENE-980526-109] 17.41798 R122Pfs/+
actc1b 1.8400417 2.4912099 3.48e-05 0.0391573 TRUE ENSDARG00000099197 52.18688 1509 actin alpha cardiac muscle 1b [Source:ZFIN;Acc:ZDB-GENE-000322-1] 17.13410 R122Pfs/+
topTables_RUVk1$trans %>%
  dplyr::filter(DE == TRUE) %>% 
  kable(caption = "edgeR res, transhet vs WT comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
edgeR res, transhet vs WT comparison
symbol logFC logCPM PValue FDR DE gene_id gc_content length description LR coef
pimr191 2.9119015 1.6968427 0.00e+00 0.0000000 TRUE ENSDARG00000055683 49.24027 2106 Pim proto-oncogene, serine/threonine kinase, related 191 [Source:ZFIN;Acc:ZDB-GENE-131119-3] 57.18526 trans
rab4b 1.3300170 3.1397941 0.00e+00 0.0000000 TRUE ENSDARG00000012177 38.37866 2970 RAB4B, member RAS oncogene family [Source:ZFIN;Acc:ZDB-GENE-040822-15] 49.26888 trans
sorl1 -0.5517887 6.9083682 0.00e+00 0.0000002 TRUE ENSDARG00000013892 49.25999 8581 sortilin-related receptor, L(DLR class) A repeats containing [Source:ZFIN;Acc:ZDB-GENE-050208-22] 43.63512 trans
cuedc1b 1.7283209 3.7044499 0.00e+00 0.0000071 TRUE ENSDARG00000054748 50.21272 1855 CUE domain containing 1b [Source:ZFIN;Acc:ZDB-GENE-070424-29] 36.29816 trans
dnase2 -1.3898502 2.0092372 0.00e+00 0.0000260 TRUE ENSDARG00000073893 39.93927 3129 deoxyribonuclease II, lysosomal [Source:ZFIN;Acc:ZDB-GENE-010724-16] 33.34816 trans
cox7a1 -1.7848139 1.4302172 1.00e-07 0.0003670 TRUE ENSDARG00000069464 38.64198 570 cytochrome c oxidase subunit 7A1 [Source:ZFIN;Acc:ZDB-GENE-060929-340] 27.59662 trans
ftr90 -2.3769685 2.1453573 2.00e-07 0.0003670 TRUE ENSDARG00000097373 44.29315 2497 finTRIM family, member 90 [Source:ZFIN;Acc:ZDB-GENE-131122-80] 27.55958 trans
kcnj13 -1.0064297 3.7393156 2.10e-06 0.0045039 TRUE ENSDARG00000043443 42.04939 4192 potassium inwardly-rectifying channel, subfamily J, member 13 [Source:ZFIN;Acc:ZDB-GENE-070129-1] 22.46804 trans
si:dkey-182i3.10 0.6066976 2.6173933 2.80e-06 0.0052135 TRUE ENSDARG00000096614 41.40127 2826 si:dkey-182i3.10 [Source:ZFIN;Acc:ZDB-GENE-121214-231] 21.96098 trans
mknk2b -0.5245210 6.0219074 3.90e-06 0.0066205 TRUE ENSDARG00000015164 46.77792 1053 MAPK interacting serine/threonine kinase 2b [Source:ZFIN;Acc:ZDB-GENE-030829-2] 21.30062 trans
si:dkey-182i3.9 -0.6433283 2.1609848 5.60e-06 0.0082224 TRUE ENSDARG00000096575 38.67394 4136 si:dkey-182i3.9 [Source:ZFIN;Acc:ZDB-GENE-121214-310] 20.60416 trans
si:ch211-281l24.3 -0.8305178 7.0830386 5.90e-06 0.0082224 TRUE ENSDARG00000068637 39.11385 2192 si:ch211-281l24.3 [Source:ZFIN;Acc:ZDB-GENE-131121-575] 20.53626 trans
unc119a 0.5912908 3.2184112 6.70e-06 0.0086379 TRUE ENSDARG00000034453 47.57945 603 unc-119 homolog a (C. elegans) [Source:ZFIN;Acc:ZDB-GENE-090930-2] 20.28874 trans
zgc:154093 -1.1473681 1.0915830 1.31e-05 0.0157194 TRUE ENSDARG00000055897 46.13977 2733 zgc:154093 [Source:ZFIN;Acc:ZDB-GENE-061103-517] 19.00311 trans
chrna10a 1.0368069 1.3416134 1.50e-05 0.0169005 TRUE ENSDARG00000011113 47.78426 1715 cholinergic receptor, nicotinic, alpha 10a [Source:ZFIN;Acc:ZDB-GENE-060503-725] 18.73331 trans
phykpl 1.0388267 1.2026550 1.77e-05 0.0186346 TRUE ENSDARG00000040566 43.61564 726 5-phosphohydroxy-L-lysine phospho-lyase [Source:ZFIN;Acc:ZDB-GENE-051127-33] 18.42408 trans
hsph1 -0.3522810 4.2977371 2.71e-05 0.0268768 TRUE ENSDARG00000019874 42.96747 2137 heat shock 105/110 protein 1 [Source:ZFIN;Acc:ZDB-GENE-120410-4] 17.61145 trans
aplp1 -0.2642627 10.7731620 3.22e-05 0.0302083 TRUE ENSDARG00000098368 45.28713 5973 amyloid beta (A4) precursor-like protein 1 [Source:ZFIN;Acc:ZDB-GENE-040319-1] 17.28067 trans
CABZ01061592.2 -4.2473766 0.7894372 4.49e-05 0.0398559 TRUE ENSDARG00000104631 40.63564 4405 NULL 16.65184 trans
alcamb -0.6117084 2.6676668 5.19e-05 0.0437511 TRUE ENSDARG00000058538 41.55296 3078 activated leukocyte cell adhesion molecule b [Source:ZFIN;Acc:ZDB-GENE-030131-1768] 16.37778 trans

Visualisation

MD plots

topTables_RUVk1 %>%
  bind_rows() %>% 
  ggplot(aes(x = logCPM, y = logFC,  colour = DE)) +
  geom_point(
    alpha = 0.5
  ) +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  geom_label_repel(
    aes(label = symbol), 
    data = .  %>% dplyr::filter(FDR < 0.05), 
    show.legend = FALSE
    ) +
  theme(legend.position = "none") +
  ggtitle("MD Plots of different sorl1 mutant comparisons to WT with RUV k=1") +
  scale_color_manual(values = c("grey50", "red"))

Volcano plots

topTables_RUVk1 %>%
  bind_rows() %>% 
  ggplot(aes(y = -log10(PValue), x = logFC, colour = DE)) +
  geom_point(
    alpha = 0.5
  ) +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  geom_label_repel(
    aes(label = symbol), 
    data = .  %>% dplyr::filter(FDR < 0.05), 
    show.legend = FALSE
    ) +
  theme(legend.position = "none") +
  ggtitle("Volcano Plots of different sorl1 mutant comparisons to WT with RUV (k = 1)") +
  scale_color_manual(values = c("grey50", "red"))

Many more genes were detected as DE now for the sorl1 null/+ and transhet samples. Statistical evidence was only observed for one DE gene in the EOfAD-like V1482Afs/+ mutant brains, cox7a1, a gene involved in the electron transport chain.

check for GC and length bias

Next, I will check whether there have been any GC or length biases introduced due to RUVSeq. I generated below the same kind of plots I did earlier in this analysis. No bias’s appear to be introduced as each of the “fit” lines are straight along 0. Note that these graphs are zoomed in so I can see whether there are any weak biases.

No biases were observed still. Which is good.

topTables_RUVk1 %>% 
  bind_rows() %>% 
  mutate(rankstat = logFC*log10(PValue)) %>% 
  ggplot(aes(x = length, y = rankstat)) +
  geom_point(
    aes(colour = DE),
    alpha = 0.5
  ) +
  geom_smooth(se = FALSE, method = "gam") +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  theme(legend.position = "none") +
  scale_color_manual(values = c("grey50", "red")) +
  scale_x_log10()+
  scale_y_continuous(limits = c(-2.5, 2.5))

topTables_RUVk1 %>% 
  bind_rows() %>% 
  mutate(rankstat = logFC*log10(PValue)) %>% 
  ggplot(aes(x = gc_content, y = rankstat)) +
  geom_point(
    aes(colour = DE),
    alpha = 0.5
  ) +
  geom_smooth(se = FALSE, method = "gam") +
  facet_grid(rows = vars(coef)) +
  theme_bw() +
  theme(legend.position = "none") +
  scale_color_manual(values = c("grey50", "red")) +
  scale_y_continuous(limits = c(-5,5))

~~ GSEAs with RUV (k = 1) ~~

Next, I want to see if as a group, genes tend to show significant changes.

fry

fryRes_RUVk1_kg <- 
  design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = TRUE) %>% 
      fry(
        index = KEGG,
        design = design_RUVk1, 
        contrast = x, 
        sort = "directional"
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05 | FDR.Mixed < 0.05, 
             p_bonf = p.adjust(PValue, "bonf"))
  }, simplify = FALSE)

fryRes_RUVk1_hm <- 
  design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = TRUE) %>% 
      fry(
        index = hallmark,
        design = design_RUVk1, 
        contrast = x, 
        sort = "directional"
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05 | FDR.Mixed < 0.05, 
             p_bonf = p.adjust(PValue, "bonf"))
  }, simplify = FALSE)

fryRes_RUVk1_hm %>% 
  bind_rows() %>% 
  bind_rows(fryRes_RUVk1_kg %>% bind_rows()) %>% 
  arrange(PValue.Mixed) %>% 
  head(20) %>% 
  kable(caption = "Most significant gene sets using fry (RUV data)") %>% 
  kable_styling("basic")
Most significant gene sets using fry (RUV data)
pathway NGenes Direction PValue FDR PValue.Mixed FDR.Mixed coef sig p_bonf
KEGG_OLFACTORY_TRANSDUCTION 23 Down 0.4790467 0.9240815 0.0039789 0.6963102 R122Pfs/+ FALSE 1
KEGG_DILATED_CARDIOMYOPATHY 87 Down 0.0910712 0.3622149 0.0048393 0.3301267 trans FALSE 1
KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION 44 Up 0.0977542 0.8288377 0.0077226 0.5281057 V1482Afs/+ FALSE 1
KEGG_HEMATOPOIETIC_CELL_LINEAGE 29 Up 0.1859407 0.8288377 0.0085066 0.5281057 V1482Afs/+ FALSE 1
HALLMARK_IL2_STAT5_SIGNALING 170 Down 0.0232968 0.2334531 0.0124745 0.3149356 trans FALSE 1
KEGG_VIRAL_MYOCARDITIS 40 Down 0.0067184 0.2575902 0.0128170 0.3301267 trans FALSE 1
KEGG_GLYCOLYSIS_GLUCONEOGENESIS 57 Down 0.0126542 0.7381629 0.0131621 0.5281057 V1482Afs/+ FALSE 1
KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS 28 Down 0.9275119 0.9717188 0.0131871 0.5281057 V1482Afs/+ FALSE 1
KEGG_SELENOAMINO_ACID_METABOLISM 26 Down 0.2247719 0.5292857 0.0143613 0.3301267 trans FALSE 1
HALLMARK_MYC_TARGETS_V2 60 Down 0.9064259 0.9185165 0.0145385 0.3149356 trans FALSE 1
KEGG_PPAR_SIGNALING_PATHWAY 61 Up 0.1312984 0.8288377 0.0167056 0.5281057 V1482Afs/+ FALSE 1
KEGG_HYPERTROPHIC_CARDIOMYOPATHY_HCM 79 Down 0.1578463 0.4281959 0.0167987 0.3301267 trans FALSE 1
KEGG_FOCAL_ADHESION 199 Down 0.0088317 0.2575902 0.0182385 0.3301267 trans FALSE 1
KEGG_PURINE_METABOLISM 140 Up 0.9476591 0.9698266 0.0191120 0.3301267 trans FALSE 1
KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GANGLIO_SERIES 16 Up 0.1634845 0.9240815 0.0192433 0.8493961 R122Pfs/+ FALSE 1
KEGG_PRION_DISEASES 25 Down 0.0068573 0.2575902 0.0211141 0.3301267 trans FALSE 1
HALLMARK_TNFA_SIGNALING_VIA_NFKB 164 Down 0.0420216 0.2334531 0.0211943 0.3149356 trans FALSE 1
KEGG_RETINOL_METABOLISM 29 Up 0.1875437 0.8288377 0.0263639 0.5281057 V1482Afs/+ FALSE 1
KEGG_FRUCTOSE_AND_MANNOSE_METABOLISM 41 Down 0.1479551 0.8288377 0.0293704 0.5281057 V1482Afs/+ FALSE 1
KEGG_GLYCINE_SERINE_AND_THREONINE_METABOLISM 25 Up 0.1978493 0.8288377 0.0296937 0.5281057 V1482Afs/+ FALSE 1

Still no significant pathways are observed. So I will try the “ensemble” method of GSEA, by performing 3 different methods of GSEA and combining raw p values by calculating their harmonic mean.

camera

CAMERA (Correlation Adjusted MEan RAnk) is a competitive gene set test which accounts for inter-gene correlation. It tests whether the genes in the set are highly ranked in terms of differential expression relative to genes not in the set.

No gene sets were signifcantly altered in this gene set test. Some of the top gene sets are shown below.

camera_res_KEGG_RUVk1 <- 
  design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = TRUE) %>% 
      camera(
        index = KEGG,
        design = design_RUVk1, 
        contrast = x, 
        inter.gene.cor = NA, 
        sort = TRUE
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05)
  }, simplify = FALSE)

camera_res_hm_RUVk1 <- 
  design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = TRUE) %>% 
      camera(
        index = hallmark,
        design = design_RUVk1, 
        contrast = x, 
        inter.gene.cor = NA, 
        sort = TRUE
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05)
  }, simplify = FALSE)

camera_res_KEGG_RUVk1 %>% 
  bind_rows() %>% 
  bind_rows(camera_res_hm_RUVk1 %>% bind_rows()) %>% 
  arrange(PValue) %>% 
  head(10) %>% 
  kable(caption = "Top 10 DE pathways using the camera algorithm") %>% 
  kable_styling() 
Top 10 DE pathways using the camera algorithm
pathway NGenes Correlation Direction PValue FDR coef sig
KEGG_STARCH_AND_SUCROSE_METABOLISM 29 -0.0046737 Down 0.0009368 0.1639340 R122Pfs/+ FALSE
KEGG_PYRUVATE_METABOLISM 38 0.0427810 Down 0.0052644 0.4606321 R122Pfs/+ FALSE
HALLMARK_MTORC1_SIGNALING 210 0.0210840 Down 0.0063255 0.3162751 R122Pfs/+ FALSE
KEGG_CELL_ADHESION_MOLECULES_CAMS 91 0.0015989 Up 0.0066042 0.9069268 V1482Afs/+ FALSE
KEGG_PRIMARY_BILE_ACID_BIOSYNTHESIS 18 0.0224078 Down 0.0152056 0.7133472 R122Pfs/+ FALSE
KEGG_GLYCOLYSIS_GLUCONEOGENESIS 57 0.0189784 Down 0.0165228 0.7133472 R122Pfs/+ FALSE
KEGG_MELANOMA 62 0.0149220 Down 0.0171354 0.8497136 trans FALSE
KEGG_GLYCOLYSIS_GLUCONEOGENESIS 57 0.0189784 Down 0.0193182 0.9069268 V1482Afs/+ FALSE
KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_BIOSYNTHESIS 12 0.0207579 Down 0.0199945 0.9069268 V1482Afs/+ FALSE
KEGG_RNA_POLYMERASE 25 0.0697358 Up 0.0205439 0.8497136 trans FALSE

fgsea

Finally I will look at fgsea, This is the fast implementation of GSEA. All genes are ranked by differential expression (sign(logFC) x log10(PValue)), resulting in a ranked list starting from the most upregulated genes, ending in the most downregulated genes, and unchanged genes in the middle. The fgsea uses a Kolmogorov–Smirnov-like statistc called an enrichment score (ES), that represents the amount to which genes in a predefined geneset are overrepresented at either the start or end of the ranked list. fgsea then estimates the statistical significance of the ES. This calculation is done by a gene permutations in order to produce a null distribution for the ES. The P value is determined by comparison to the null distribution. Note that fgsea is quite naive in that it does not account for inter-gene correlation, therefore it will likely show false positives due to the nature of the permutations it performs.

# create a rank stat for fgsea
ranks_fgseaRUVk1 <- 
   sapply(topTables_RUVk1, function(x) {
     x %>% 
       mutate(PValue_withsign = sign(logFC) * log10(1/PValue)) %>% 
       arrange(PValue_withsign) %>% 
       dplyr::select(c("gene_id", "PValue_withsign")) %>% #only want the Pvalue with sign
       with(structure(PValue_withsign, names = gene_id)) %>% 
       rev() # reverse so the start of the list is upregulated genes
   }, simplify = FALSE)

# save ranks forsimulation
# ranks_fgseaRUVk1 %>% saveRDS("R_objects/ranksRUVSeq.rds")

# Run fgsea
# This takes a while due to the number of permutations
# set a seed for a reproducible result
set.seed(33)
fgseaRes_RUVk1_kg <- ranks_fgseaRUVk1 %>%
  lapply(function(x){
    fgseaMultilevel(stats = x, pathways = KEGG) %>%
      as_tibble() %>%
      dplyr::rename(FDR = padj) %>%
      mutate(padj = p.adjust(pval, "bonferroni")) %>%
      dplyr::select(pathway, pval, FDR, padj, everything()) %>%
      arrange(pval) %>%
      mutate(sig = padj < 0.05)
  })

fgseaRes_RUVk1_kg$`R122Pfs/+` %<>% 
  mutate(coef = "R122Pfs/+")

fgseaRes_RUVk1_kg$trans %<>% 
  mutate(coef = "trans")

fgseaRes_RUVk1_kg$`V1482Afs/+` %<>% 
  mutate(coef = "V1482Afs/+")

# Run fgsea
# This takes a while due to the number of permutations
set.seed(33)
fgseaRes_RUVk1_hm <- ranks_fgseaRUVk1 %>%
  lapply(function(x){
    fgseaMultilevel(stats = x, pathways = hallmark) %>%
      as_tibble() %>%
      dplyr::rename(FDR = padj) %>%
      mutate(padj = p.adjust(pval, "bonferroni")) %>%
      dplyr::select(pathway, pval, FDR, padj, everything()) %>%
      arrange(pval) %>%
      mutate(sig = padj < 0.05)
  })

fgseaRes_RUVk1_hm$`R122Pfs/+` %<>% 
  mutate(coef = "R122Pfs/+")

fgseaRes_RUVk1_hm$trans %<>% 
  mutate(coef = "trans")

fgseaRes_RUVk1_hm$`V1482Afs/+` %<>% 
  mutate(coef = "V1482Afs/+")

fgseaRes_RUVk1_hm %>% 
  bind_rows() %>% 
  bind_rows(fgseaRes_RUVk1_kg %>% 
              bind_rows()) %>% 
  dplyr::filter(sig == T) %>% 
  dplyr::select(-leadingEdge) %>% 
  kable(caption = "Significant KEGG & HALLMARK gene sets using the fgsea algorithm. Gene sets were considered signiicant if they had a Bonferroni-adjusted p value of < 0.05.") %>% 
  kable_styling()
Significant KEGG & HALLMARK gene sets using the fgsea algorithm. Gene sets were considered signiicant if they had a Bonferroni-adjusted p value of < 0.05.
pathway pval FDR padj log2err ES NES size sig coef
HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.0000000 0.0000002 0.0000002 0.7614608 -0.5166880 -2.163813 225 TRUE R122Pfs/+
HALLMARK_MTORC1_SIGNALING 0.0000002 0.0000039 0.0000078 0.6901325 -0.4848398 -2.012731 210 TRUE R122Pfs/+
HALLMARK_IL2_STAT5_SIGNALING 0.0000121 0.0002024 0.0006072 0.5933255 -0.4767654 -1.915894 170 TRUE R122Pfs/+
HALLMARK_ADIPOGENESIS 0.0000206 0.0002575 0.0010299 0.5756103 -0.4400068 -1.816515 204 TRUE R122Pfs/+
HALLMARK_FATTY_ACID_METABOLISM 0.0000298 0.0002982 0.0014909 0.5756103 -0.4716552 -1.873050 158 TRUE R122Pfs/+
HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.0000102 0.0005083 0.0005083 0.5933255 -0.4864199 -1.795507 164 TRUE trans
HALLMARK_IL2_STAT5_SIGNALING 0.0000470 0.0011747 0.0023494 0.5573322 -0.4599824 -1.704567 170 TRUE trans
HALLMARK_HEME_METABOLISM 0.0002376 0.0039607 0.0118820 0.5188481 -0.4327065 -1.608856 179 TRUE trans
HALLMARK_PI3K_AKT_MTOR_SIGNALING 0.0005738 0.0071719 0.0286876 0.4772708 -0.4758469 -1.662581 104 TRUE trans
HALLMARK_HYPOXIA 0.0009358 0.0093582 0.0467910 0.4772708 -0.4072651 -1.542389 208 TRUE trans
HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.0000000 0.0000000 0.0000000 NA -0.5293729 -2.296493 225 TRUE V1482Afs/+
HALLMARK_MYC_TARGETS_V1 0.0000154 0.0003853 0.0007707 0.5756103 -0.3968557 -1.723529 219 TRUE V1482Afs/+
HALLMARK_ALLOGRAFT_REJECTION 0.0005121 0.0069740 0.0256044 0.4772708 0.4389593 1.670693 112 TRUE V1482Afs/+
HALLMARK_MTORC1_SIGNALING 0.0006946 0.0069740 0.0347296 0.4772708 -0.3497078 -1.514592 210 TRUE V1482Afs/+
HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION 0.0006974 0.0069740 0.0348700 0.4772708 0.3773206 1.557518 190 TRUE V1482Afs/+
KEGG_OXIDATIVE_PHOSPHORYLATION 0.0000012 0.0002126 0.0002126 0.6435518 -0.5487070 -2.102000 120 TRUE R122Pfs/+
KEGG_RIBOSOME 0.0000092 0.0008025 0.0016050 0.5933255 -0.6089914 -2.151997 71 TRUE R122Pfs/+
KEGG_PYRUVATE_METABOLISM 0.0000151 0.0008823 0.0026470 0.5933255 -0.7106757 -2.246468 38 TRUE R122Pfs/+
KEGG_PARKINSONS_DISEASE 0.0000663 0.0029003 0.0116010 0.5384341 -0.5098636 -1.954922 123 TRUE R122Pfs/+
KEGG_OXIDATIVE_PHOSPHORYLATION 0.0000010 0.0001791 0.0001791 0.6435518 0.5218836 2.084416 120 TRUE trans
KEGG_SPLICEOSOME 0.0001093 0.0082282 0.0191352 0.5384341 0.4346899 1.757224 130 TRUE trans
KEGG_MELANOMA 0.0001411 0.0082282 0.0246847 0.5188481 -0.5654522 -1.847517 62 TRUE trans
KEGG_PYRIMIDINE_METABOLISM 0.0002564 0.0100465 0.0448727 0.4984931 0.4750257 1.791782 80 TRUE trans
KEGG_OXIDATIVE_PHOSPHORYLATION 0.0000000 0.0000000 0.0000000 NA -0.6013872 -2.419244 120 TRUE V1482Afs/+
KEGG_RIBOSOME 0.0000000 0.0000030 0.0000059 0.7195128 0.6248822 2.258843 71 TRUE V1482Afs/+
KEGG_PARKINSONS_DISEASE 0.0000007 0.0000420 0.0001261 0.6594444 -0.5033531 -2.018534 123 TRUE V1482Afs/+
KEGG_CITRATE_CYCLE_TCA_CYCLE 0.0000022 0.0000955 0.0003821 0.6272567 -0.7102651 -2.302941 34 TRUE V1482Afs/+
KEGG_PROTEASOME 0.0000077 0.0002678 0.0013392 0.5933255 -0.6334235 -2.179186 47 TRUE V1482Afs/+
KEGG_ALZHEIMERS_DISEASE 0.0000349 0.0010170 0.0061022 0.5573322 -0.4281906 -1.772509 151 TRUE V1482Afs/+

harmonic p value

Significant gene sets were only observed in the transhet for fry (probably the most correct method with the best power). Some siginicant gene sets were observed for the V1482Afs and null samples in the fgsea algorithm after bonferroni adjustment (which is a very strict threshold), so there may be something there.

Next, i will perform a meta analysis of the GSEAs by calculating the harmonic mean p value. This method corrects for the strong-sense family-wise error rate, and states whether groups of dependent p values are statistically significant.

harmonicPvals_hm <- 
  fryRes_RUVk1_hm %>% 
  bind_rows() %>% 
  dplyr::select(pathway, PValue.Mixed, coef) %>% 
  dplyr::rename(fry_p = PValue.Mixed) %>% 
  left_join(camera_res_hm_RUVk1 %>% 
              bind_rows() %>% 
              dplyr::select(pathway, PValue, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(camera_p = PValue) %>% 
  left_join(fgseaRes_RUVk1_hm %>% 
              bind_rows() %>% 
              dplyr::select(pathway, pval, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(fgsea_p = pval) %>% 
  bind_rows() %>% 
  nest(p = one_of(c("fry_p", "camera_p", "fgsea_p"))) %>% 
 mutate(harmonic_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        p.hmp(x, L = 4)
      }, numeric(1)), 
       wilkinson_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x)$p
      }, numeric(1)), 
      wilkinson_p_2 = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x, r = 2)$p
      }, numeric(1)), 
       wilkinson_p_3 = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x, r = 3)$p
      }, numeric(1))
    ) %>% 
  unnest() %>% 
  mutate(harmonic_p_FDR = p.adjust(harmonic_p, "fdr"), 
         wilk_p_FDR = p.adjust(wilkinson_p, "fdr"),
         wilk_p_2_FDR = p.adjust(wilkinson_p_2, "fdr"),
         wilk_p_3_FDR = p.adjust(wilkinson_p_3, "fdr"),
         sig = harmonic_p_FDR < 0.05) %>% 
  arrange(wilkinson_p_2) 

harmonicPvals_kg <- 
  fryRes_RUVk1_kg%>% 
  bind_rows() %>% 
  dplyr::select(pathway, PValue.Mixed, coef) %>% 
  dplyr::rename(fry_p = PValue.Mixed) %>% 
  left_join(camera_res_KEGG_RUVk1 %>% 
              bind_rows() %>% 
              dplyr::select(pathway, PValue, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(camera_p = PValue) %>% 
  left_join(fgseaRes_RUVk1_kg %>% 
              bind_rows() %>% 
              dplyr::select(pathway, pval, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(fgsea_p = pval) %>% 
  bind_rows() %>% 
  nest(p = one_of(c("fry_p", "camera_p", "fgsea_p"))) %>% 
 mutate(harmonic_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        p.hmp(x, L = 4)
      }, numeric(1)), 
       wilkinson_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x)$p
      }, numeric(1)), 
      wilkinson_p_2 = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x, r = 2)$p
      }, numeric(1)), 
       wilkinson_p_3 = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x, r = 3)$p
      }, numeric(1))
    ) %>% 
  unnest() %>% 
  mutate(harmonic_p_FDR = p.adjust(harmonic_p, "fdr"), 
         wilk_p_FDR = p.adjust(wilkinson_p, "fdr"),
         wilk_p_2_FDR = p.adjust(wilkinson_p_2, "fdr"),
         wilk_p_3_FDR = p.adjust(wilkinson_p_3, "fdr"),
         sig = harmonic_p_FDR < 0.05) %>% 
  arrange(wilkinson_p_2) 

harmonicPvals_hm %>% 
  bind_rows(harmonicPvals_kg) %>% 
  dplyr::filter(sig == TRUE & coef == "V1482Afs/+") %>% 
  kable(caption = "Significant gene sets in V1482Afs/+ brains (harmonic mean p FDR adjusted < 0.05)") %>% 
  kable_styling()
Significant gene sets in V1482Afs/+ brains (harmonic mean p FDR adjusted < 0.05)
pathway coef fry_p camera_p fgsea_p harmonic_p wilkinson_p wilkinson_p_2 wilkinson_p_3 harmonic_p_FDR wilk_p_FDR wilk_p_2_FDR wilk_p_3_FDR sig
HALLMARK_MTORC1_SIGNALING V1482Afs/+ 0.0427354 0.1052653 0.0006946 0.0020782 0.0020823 0.0053229 0.0011664 0.0227777 0.0224008 0.1144951 0.0510597 TRUE
HALLMARK_APICAL_JUNCTION V1482Afs/+ 0.1624615 0.0488034 0.0016904 0.0050611 0.0050626 0.0069128 0.0042880 0.0446570 0.0446703 0.1152142 0.0558904 TRUE
HALLMARK_OXIDATIVE_PHOSPHORYLATION V1482Afs/+ 0.1115355 0.1691985 0.0000000 0.0000000 0.0000000 0.0345455 0.0048438 0.0000000 0.0000000 0.1851435 0.0558904 TRUE
HALLMARK_ALLOGRAFT_REJECTION V1482Afs/+ 0.1148330 0.1169782 0.0005121 0.0015466 0.0015355 0.0365313 0.0016007 0.0210895 0.0209383 0.1851435 0.0510597 TRUE
HALLMARK_MYC_TARGETS_V1 V1482Afs/+ 0.5039341 0.2243305 0.0000154 0.0000463 0.0000462 0.1283940 0.1279738 0.0011567 0.0011560 0.3106307 0.2841949 TRUE
HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION V1482Afs/+ 0.3526774 0.2939392 0.0006974 0.0021259 0.0020907 0.2084079 0.0438665 0.0227777 0.0224008 0.4083261 0.1848843 TRUE
KEGG_CELL_ADHESION_MOLECULES_CAMS V1482Afs/+ 0.1061212 0.0066042 0.0003867 0.0011048 0.0011595 0.0001303 0.0011951 0.0341179 0.0351487 0.0204800 0.0330227 TRUE
KEGG_GLYCOLYSIS_GLUCONEOGENESIS V1482Afs/+ 0.0131621 0.0193182 0.0005370 0.0015308 0.0016100 0.0005152 0.0000072 0.0414823 0.0419055 0.0450765 0.0037849 TRUE
KEGG_CITRATE_CYCLE_TCA_CYCLE V1482Afs/+ 0.0322982 0.0470769 0.0000022 0.0000065 0.0000065 0.0030621 0.0001043 0.0005731 0.0005731 0.0937173 0.0152851 TRUE
KEGG_ADHERENS_JUNCTION V1482Afs/+ 0.0784379 0.0468924 0.0005278 0.0015803 0.0015827 0.0063905 0.0004826 0.0414823 0.0419055 0.1266652 0.0211133 TRUE
KEGG_PROTEASOME V1482Afs/+ 0.3299721 0.1723537 0.0000077 0.0000230 0.0000230 0.0788776 0.0359279 0.0017224 0.0017218 0.3022680 0.1598486 TRUE
KEGG_OXIDATIVE_PHOSPHORYLATION V1482Afs/+ 0.2430565 0.2458999 0.0000000 0.0000000 0.0000000 0.1485116 0.0148688 0.0000002 0.0000002 0.4277462 0.1075328 TRUE
KEGG_ALZHEIMERS_DISEASE V1482Afs/+ 0.3230226 0.2860488 0.0000349 0.0001047 0.0001046 0.1986604 0.0337053 0.0054986 0.0054918 0.4919657 0.1538722 TRUE
KEGG_PARKINSONS_DISEASE V1482Afs/+ 0.3299820 0.3368124 0.0000007 0.0000022 0.0000022 0.2548021 0.0382089 0.0003782 0.0003782 0.5560050 0.1644273 TRUE
KEGG_RIBOSOME V1482Afs/+ 0.4771629 0.4687473 0.0000000 0.0000001 0.0000001 0.4531820 0.1086425 0.0000266 0.0000266 0.7556810 0.2910068 TRUE
harmonicPvals_hm %>% 
  bind_rows(harmonicPvals_kg) %>% 
  dplyr::filter(sig == TRUE & coef == "R122Pfs/+") %>% 
  kable(caption = "Significant gene sets in null/+ brains (harmonic mean p FDR adjusted < 0.05)") %>% 
  kable_styling()
Significant gene sets in null/+ brains (harmonic mean p FDR adjusted < 0.05)
pathway coef fry_p camera_p fgsea_p harmonic_p wilkinson_p wilkinson_p_2 wilkinson_p_3 harmonic_p_FDR wilk_p_FDR wilk_p_2_FDR wilk_p_3_FDR sig
HALLMARK_MTORC1_SIGNALING R122Pfs/+ 0.2661002 0.0063255 0.0000002 0.0000005 0.0000005 0.0001195 0.0188424 0.0000233 0.0000233 0.0179295 0.1284708 TRUE
HALLMARK_IL2_STAT5_SIGNALING R122Pfs/+ 0.1338565 0.1326548 0.0000121 0.0000364 0.0000364 0.0481231 0.0023984 0.0010934 0.0010930 0.1950939 0.0510597 TRUE
HALLMARK_FATTY_ACID_METABOLISM R122Pfs/+ 0.1683572 0.1419083 0.0000298 0.0000895 0.0000895 0.0546984 0.0047719 0.0016787 0.0016772 0.2016982 0.0558904 TRUE
HALLMARK_ADIPOGENESIS R122Pfs/+ 0.4622099 0.2215037 0.0000206 0.0000618 0.0000618 0.1254560 0.0987456 0.0013251 0.0013241 0.3084984 0.2553765 TRUE
HALLMARK_XENOBIOTIC_METABOLISM R122Pfs/+ 0.3596106 0.2452983 0.0013260 0.0040835 0.0039727 0.1509940 0.0465048 0.0382830 0.0372445 0.3484477 0.1885328 TRUE
HALLMARK_OXIDATIVE_PHOSPHORYLATION R122Pfs/+ 0.5380352 0.2534905 0.0000000 0.0000000 0.0000000 0.1601950 0.1557514 0.0000011 0.0000011 0.3639867 0.3066466 TRUE
KEGG_PYRUVATE_METABOLISM R122Pfs/+ 0.0614810 0.0052644 0.0000151 0.0000453 0.0000454 0.0000828 0.0002324 0.0026405 0.0026469 0.0204800 0.0152851 TRUE
KEGG_OXIDATIVE_PHOSPHORYLATION R122Pfs/+ 0.5945218 0.3316675 0.0000012 0.0000036 0.0000036 0.2570409 0.2101374 0.0003827 0.0003827 0.5560050 0.4178869 TRUE
KEGG_PARKINSONS_DISEASE R122Pfs/+ 0.6136803 0.3546357 0.0000663 0.0001993 0.0001989 0.2880969 0.2311142 0.0095132 0.0094911 0.5954759 0.4428283 TRUE
KEGG_RIBOSOME R122Pfs/+ 0.5016710 0.4576009 0.0000092 0.0000275 0.0000275 0.4365538 0.1262574 0.0018063 0.0018056 0.7441258 0.3099307 TRUE
harmonicPvals_hm %>% 
  bind_rows(harmonicPvals_kg) %>% 
  dplyr::filter(sig == TRUE & coef == "trans") %>% 
  kable(caption = "Significant gene sets in trans brains (harmonic mean p FDR adjusted < 0.05)") %>% 
  kable_styling()
Significant gene sets in trans brains (harmonic mean p FDR adjusted < 0.05)
pathway coef fry_p camera_p fgsea_p harmonic_p wilkinson_p wilkinson_p_2 wilkinson_p_3 harmonic_p_FDR wilk_p_FDR wilk_p_2_FDR wilk_p_3_FDR sig
HALLMARK_IL2_STAT5_SIGNALING trans 0.0124745 0.0907546 0.0000470 0.0001406 0.0001410 0.0004630 0.0007475 0.0023438 0.0023493 0.0347218 0.0510597 TRUE
HALLMARK_TNFA_SIGNALING_VIA_NFKB trans 0.0211943 0.1318845 0.0000102 0.0000305 0.0000305 0.0013286 0.0022939 0.0010934 0.0010930 0.0664276 0.0510597 TRUE
HALLMARK_PI3K_AKT_MTOR_SIGNALING trans 0.0658323 0.1476994 0.0005738 0.0017290 0.0017203 0.0124310 0.0032221 0.0216130 0.0215033 0.1245680 0.0532970 TRUE
HALLMARK_HYPOXIA trans 0.0720119 0.1240488 0.0009358 0.0028230 0.0028048 0.0148103 0.0019089 0.0282301 0.0280484 0.1306787 0.0510597 TRUE
HALLMARK_HEME_METABOLISM trans 0.0742018 0.1396451 0.0002376 0.0007151 0.0007128 0.0157006 0.0027232 0.0107262 0.0106913 0.1308385 0.0510597 TRUE
KEGG_MELANOMA trans 0.0988569 0.0171354 0.0001411 0.0004212 0.0004231 0.0008708 0.0009661 0.0170102 0.0170870 0.0507966 0.0330227 TRUE
KEGG_INSULIN_SIGNALING_PATHWAY trans 0.0518222 0.2926678 0.0003398 0.0010225 0.0010190 0.0077783 0.0250683 0.0341179 0.0341479 0.1266652 0.1361045 TRUE
KEGG_PANCREATIC_CANCER trans 0.0582801 0.1045314 0.0006521 0.0019595 0.0019550 0.0097938 0.0011422 0.0439394 0.0433922 0.1380552 0.0330227 TRUE
KEGG_MAPK_SIGNALING_PATHWAY trans 0.0763288 0.3930536 0.0006070 0.0018366 0.0018199 0.0165889 0.0607233 0.0438274 0.0433922 0.1527451 0.2189536 TRUE
KEGG_REGULATION_OF_ACTIN_CYTOSKELETON trans 0.1054149 0.0771078 0.0003470 0.0010445 0.0010407 0.0169199 0.0011714 0.0341179 0.0341479 0.1527451 0.0330227 TRUE
KEGG_SPLICEOSOME trans 0.1219252 0.0948070 0.0001093 0.0003287 0.0003280 0.0252608 0.0018125 0.0143793 0.0143499 0.1816702 0.0432531 TRUE
KEGG_PYRIMIDINE_METABOLISM trans 0.1238372 0.1490330 0.0002564 0.0007729 0.0007690 0.0422087 0.0033101 0.0289824 0.0288393 0.2475084 0.0599250 TRUE
KEGG_ENDOCYTOSIS trans 0.1385912 0.2405890 0.0006617 0.0020087 0.0019836 0.0522986 0.0139260 0.0439394 0.0433922 0.2590261 0.1029741 TRUE
KEGG_MTOR_SIGNALING_PATHWAY trans 0.1716417 0.2378487 0.0004019 0.0012159 0.0012051 0.0782692 0.0134556 0.0354650 0.0351487 0.3021422 0.1009168 TRUE
KEGG_OXIDATIVE_PHOSPHORYLATION trans 0.5334366 0.3863112 0.0000010 0.0000031 0.0000031 0.3324057 0.1517918 0.0003827 0.0003827 0.6322934 0.3391094 TRUE
KEGG_HUNTINGTONS_DISEASE trans 0.5234449 0.4240937 0.0005591 0.0017015 0.0016762 0.3870153 0.1434210 0.0425379 0.0419055 0.6911793 0.3346490 TRUE

Visualisation

dotplot summary

sigpaths <- 
  harmonicPvals_hm %>% 
  bind_rows(harmonicPvals_kg %>% bind_rows()) %>% 
  dplyr::filter(wilk_p_FDR < 0.05) %>% 
  .$pathway

harmonicPvals_hm %>% 
  bind_rows(harmonicPvals_kg %>% bind_rows()) %>% 
  dplyr::filter(pathway %in% sigpaths) %>% 
  ggplot(aes(coef, pathway, size =-log10(wilkinson_p))) +
  geom_point(aes(colour = sig)) +
  geom_text(
    aes(label = harmonic_p_FDR %>% signif(1)), 
    size = 2.5, 
    vjust = 2.5
  ) +
  labs(colour = "harmonic mean p < 0.05 \nafter FDR adjustment")+
  ggpubr::theme_pubclean()+
  theme(legend.position = "right")+
  easy_rotate_labels(which = "x", angle =315) +
  scale_color_manual(values = c("grey30", "turquoise3")) 

  # ggsave("plots/dotplot_RUVk1_HMandKG_harmonicP.png", width = 20, height = 30, units = "cm", dpi = 300)

sig_in <- harmonicPvals_hm %>%
  bind_rows(harmonicPvals_kg %>% bind_rows()) %>%
  dplyr::filter(pathway %in% sigpaths) %>%
  dplyr::select(pathway, harmonic_p_FDR, coef) %>%
  spread(key = "coef", value = "harmonic_p_FDR") %>%
  mutate(sig_in = case_when(
    `R122Pfs/+` < 0.05 & trans < 0.05 & `V1482Afs/+` < 0.05 ~ "all",
    `R122Pfs/+` > 0.05 & trans < 0.05 & `V1482Afs/+` < 0.05 ~ "trans&EOfAD",
    `R122Pfs/+` < 0.05 & trans > 0.05 & `V1482Afs/+` < 0.05 ~ "null&EOfAD",
    `R122Pfs/+` < 0.05 & trans < 0.05 & `V1482Afs/+` > 0.05 ~ "null&trans",
    `R122Pfs/+` < 0.05 & trans > 0.05 & `V1482Afs/+` > 0.05 ~ "null only",
    `R122Pfs/+` > 0.05 & trans < 0.05 & `V1482Afs/+` > 0.05 ~ "transOnly",
    `R122Pfs/+` > 0.05 & trans > 0.05 & `V1482Afs/+` < 0.05 ~ "eofAD only",
  )) %>%
  dplyr::select(pathway, sig_in)

harmonicPvals_hm %>%
  bind_rows(harmonicPvals_kg %>% bind_rows()) %>%
  dplyr::filter(pathway %in% sigpaths) %>%
  left_join(sig_in) %>%
  ggplot(aes(coef, pathway, size =-log10(harmonic_p))) +
  geom_point(aes(colour = sig)) +
  geom_text(
    aes(label = harmonic_p_FDR %>% signif(1)),
    size = 2.5,
    vjust = 2.5
  ) +
  labs(colour = "harmonic mean p < 0.05 \nafter FDR adjustment")+
  ggpubr::theme_pubclean()+
  theme(legend.position = "right")+
  easy_rotate_labels(which = "x", angle =315) +
  facet_wrap(~sig_in, scales = "free", strip.position = "right") +
  scale_color_manual(values = c("grey30", "turquoise3"))

  # ggsave("plots/dotplotbySignifin.png", width = 40, units = "cm")

~~ GSEA IRE with RUV k = 1 ~~

Nhi Hin, a PhD candidate in our lab, recently developed a method for testing for iron dyshomeostasis in RNA-seq data. (see https://www.biorxiv.org/content/10.1101/2020.05.01.071498v2 for the preprint article). I will use a modified version of this analysis, where i will use the RUV adjusted counts as input rather than a limma voom object as input for the GSEAs on the IRE gene sets. Also, rather than combining the raw p-values using Wilkinsons method (which assumes that each p value from each of the methods of GSEA are independent), i will use the harmonic mean p method, which does not have this assumption.

fry, camera, and fgsea are the GSEA algorithms,

import the gene sets

zebrafishIreGenes <- readRDS("gene_sets/zebrafishIreGenes.rds")

GSEAs

Perform the individual tests

fry_ire_RUVk1 <- 
  design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
   geneDGE %>% 
      cpm(log = T) %>% 
      fry(
        index = zebrafishIreGenes,
        design = design_RUVk1, 
        contrast = x, 
        sort = "directional"
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05)
  }, simplify = FALSE)

camera_ire_RUV <- 
  design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = T) %>% 
      camera(
        index = zebrafishIreGenes,
        design = design_RUVk1, 
      contrast = x, 
      inter.gene.cor = NA, 
      sort = TRUE
    ) %>% 
    rownames_to_column("pathway") %>% 
    as_tibble() %>% 
      mutate(coef = x, 
             sig = FDR < 0.05)
  }, simplify = FALSE)

# Run fgsea
fgseaRes_ire_RUV <- ranks_fgseaRUVk1 %>% 
  lapply(function(x){
    fgseaMultilevel(stats = x, 
          pathways = zebrafishIreGenes) %>% 
      as_tibble() %>%
      dplyr::rename(FDR = padj) %>%
      mutate(padj = p.adjust(pval, "bonferroni")) %>%
      dplyr::select(pathway, pval, FDR, padj, everything()) %>%
      arrange(pval) %>%
      mutate(sig = padj < 0.05)
  })

fgseaRes_ire_RUV$`R122Pfs/+` %<>% 
  mutate(coef = "R122Pfs/+")

fgseaRes_ire_RUV$trans %<>% 
  mutate(coef = "trans")

fgseaRes_ire_RUV$`V1482Afs/+` %<>% 
  mutate(coef = "V1482Afs/+")

harmonic p

harmonic_p_ire_RUV <- 
  fry_ire_RUVk1 %>% 
  bind_rows() %>% 
  dplyr::select(pathway, PValue.Mixed, coef) %>% 
  dplyr::rename(fry_p = PValue.Mixed) %>% 
  left_join(camera_ire_RUV %>% 
              bind_rows() %>% 
              dplyr::select(pathway, PValue, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(camera_p = PValue) %>% 
  left_join(fgseaRes_ire_RUV %>% 
              bind_rows() %>% 
              dplyr::select(pathway, pval, coef), 
            by = c("pathway", "coef")
            ) %>% 
  dplyr::rename(fgsea_p = pval) %>% 
  bind_rows() %>% 
  nest(p = one_of(c("fry_p", "camera_p", "fgsea_p"))) %>% 
 mutate(harmonic_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        p.hmp(x, L = 4)
      }, numeric(1)), 
       wilkinson_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        wilkinsonp(x)$p
      }, numeric(1))
    ) %>% 
  unnest() %>% 
  mutate(harmonic_p_FDR = p.adjust(harmonic_p, "fdr"), 
         wilk_p_FDR = p.adjust(wilkinson_p, "fdr"),
         sig = harmonic_p_FDR < 0.05) %>% 
  arrange(harmonic_p_FDR) 

Visualisation

harmonic p values

harmonic_p_ire_RUV %>% 
  mutate(
    pathway = case_when(
      pathway == "ire5_hq" ~ "Canonical 5' IRE", 
      pathway == "ire5_all" ~ "All predicted 5' IRE", 
      pathway == "ire3_hq" ~ "Canonical 3' IRE", 
      pathway == "ire3_all" ~ "All predicted 3' IRE"
    )
  ) %>% 
  ggplot(aes(pathway, -log10(harmonic_p), fill = sig)) +
  geom_col() +
  facet_wrap(~coef) +
  coord_flip() +
  xlab("") +
  scale_fill_manual(values = c("grey50", "turquoise3")) +
  theme_bw() +
  theme(strip.text = element_text(size = 5)) 

  #ggsave("plots/IRE_HARMON_P.png", width = 10, height = 5, units = "cm", dpi = 300)

pheatmap

#png("plots/hq_3IRE_pheatmap_logFC.png", width = 5, height = 25, units = "cm", res = 300)
topTables_RUVk1 %>% 
  bind_rows() %>% 
  dplyr::filter(gene_id %in% zebrafishIreGenes$ire3_hq) %>% 
  dplyr::select(symbol, logFC, coef) %>% 
  spread(key = "coef", value = "logFC") %>% 
  column_to_rownames("symbol") %>% 
  pheatmap(
    border_color = NA,
    scale = "row",
    height = 20,
    main = "Canonical IRE 3'",
    fontsize = 8, 
    width = 3,
    cellwidth = 10, 
    treeheight_col = 0,
    treeheight_row = 0,
    show_rownames = T,
    color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
  )

#dev.off()
topTables_RUVk1 %>% 
  bind_rows() %>% 
  dplyr::filter(gene_id %in% zebrafishIreGenes$ire3_all) %>% 
  dplyr::select(symbol, logFC, coef) %>% 
  spread(key = "coef", value = "logFC") %>% 
  column_to_rownames("symbol") %>% 
  pheatmap(
    border_color = NA,
    scale = "row",
    height = 5,
    main = "All predicted IRE 3'",
    fontsize = 8, 
    width = 3,
    cellwidth = 10, 
    treeheight_col = 2,
    treeheight_row = 0,
    show_rownames = FALSE,
    color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
  )

~~ GSEA HIF Targets ~~

I next want to further investigate whether changes to gene expression is observed consistent with a hypoxic response. Therefore, I will test the GROSS_HYPOXIA_VIA_HIF1A gene sets from MSigDB. Only the GROSS_HYPOXIA_VIA_HIF1A_DN gene set is altered in the trans samples.

hifTargets <- list(
  GROSS_HYPOXIA_VIA_HIF1A_DN = gmtPathways("gene_sets/GROSS_HYPOXIA_VIA_HIF1A_DN.gmt") %>% 
    as.data.frame() %>% 
    set_colnames("hu_gene_name") %>% 
    left_join(zf2humangeneMapping) %>% 
    na.omit() %>% 
    .$gene_id, 
  GROSS_HYPOXIA_VIA_HIF1A_UP = gmtPathways("gene_sets/GROSS_HYPOXIA_VIA_HIF1A_UP.gmt") %>% 
    as.data.frame() %>% 
    set_colnames("hu_gene_name") %>% 
    left_join(zf2humangeneMapping) %>% 
    na.omit() %>% 
    .$gene_id
)
fry_Res_HIF <- design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = T) %>% 
      fry(
        index = hifTargets,
        design = design_RUVk1, 
        contrast = x, 
        sort = "directional"
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x, 
             )
  }, simplify = FALSE)
cameraRes_HIF <- design_RUVk1 %>% colnames() %>% .[2:4] %>% 
  sapply(function(x) {
    geneDGE %>% 
      cpm(log = TRUE) %>% 
      camera(
        index = hifTargets,
        design = design_RUVk1, 
        contrast = x, 
        inter.gene.cor = NA, 
        sort = TRUE
      ) %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(coef = x)
  }, simplify = FALSE)
fgseaRes_HIF <- ranks_fgseaRUVk1 %>% 
  lapply(function(x){
    fgseaMultilevel(stats = x, 
          pathways = hifTargets) %>% 
      as_tibble() %>%
      dplyr::rename(FDR = padj) %>%
      mutate(padj = p.adjust(pval, "bonferroni")) %>%
      dplyr::select(pathway, pval, FDR, padj, everything()) %>%
      arrange(pval) %>%
      mutate(sig = padj < 0.05)
  })

fgseaRes_HIF$`R122Pfs/+` %<>% 
  mutate(coef = "R122Pfs/+")

fgseaRes_HIF$trans %<>% 
  mutate(coef = "trans")

fgseaRes_HIF$`V1482Afs/+` %<>% 
  mutate(coef = "V1482Afs/+")
harmonic_p_HIF <- fry_Res_HIF%>% 
  bind_rows() %>% 
  dplyr::select(pathway, PValue.Mixed, coef) %>% 
  dplyr::rename(fry_p = PValue.Mixed) %>% 
  left_join(cameraRes_HIF %>% 
              bind_rows() %>% 
              dplyr::select(pathway, PValue, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(camera_p = PValue) %>% 
  left_join(fgseaRes_HIF %>% 
              bind_rows() %>% 
              dplyr::select(pathway, pval, coef), 
            by = c("pathway", "coef")) %>% 
  dplyr::rename(fgsea_p = pval) %>% 
  bind_rows() %>% 
  nest(p = one_of(c("fry_p", "camera_p", "fgsea_p"))) %>% 
  mutate(
    harmonic_p = vapply(p, function(x){
      x <- unlist(x)
      x <- x[!is.na(x)]
      p.hmp(x, L = 3)
    }, numeric(1))
  ) %>% 
  unnest() %>% 
  mutate(harmonic_p_FDR = p.adjust(harmonic_p, "fdr"), 
         sig = harmonic_p_FDR < 0.05) %>% 
  arrange(harmonic_p_FDR)
# png("plots/heatmaps/GROSS_HYPOXIA_DN.png", width = 5, height = 7, units = "cm", res = 300)
topTables_RUVk1 %>% 
  bind_rows() %>% 
  mutate(coef = dplyr::recode(coef, `R122Pfs/+` = "null/+")) %>% 
  dplyr::filter(gene_id %in% hifTargets$GROSS_HYPOXIA_VIA_HIF1A_DN) %>% 
  dplyr::select(symbol, logFC, coef) %>% 
  spread(key = "coef", value = "logFC") %>% 
  column_to_rownames("symbol") %>% 
  pheatmap(
    border_color = NA,
    scale = "row",
    height = 5,
    main = "GROSS_HYPOXIA_VIA_HIF1A_DN",
    fontsize = 8, 
    width = 3,
    cellwidth = 10, 
    treeheight_col = 2,
    treeheight_row = 0,
    show_rownames = FALSE,
    cluster_cols = FALSE, 
    color = colorRampPalette(rev(brewer.pal(n = 7, name =
"RdBu")))(100)
  )

#dev.off()

session info

sessionInfo() %>% 
 pander

R version 4.0.2 (2020-06-22)

Platform: x86_64-apple-darwin17.0 (64-bit)

locale: en_AU.UTF-8||en_AU.UTF-8||en_AU.UTF-8||C||en_AU.UTF-8||en_AU.UTF-8

attached base packages: grid, stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: ensembldb(v.2.12.1), AnnotationFilter(v.1.12.0), UpSetR(v.1.4.0), pander(v.0.6.3), VennDiagram(v.1.6.20), futile.logger(v.1.4.3), pathview(v.1.28.1), ggfortify(v.0.4.10), ggeasy(v.0.1.2), ggrepel(v.0.8.2), pheatmap(v.1.0.12), kableExtra(v.1.2.1), knitr(v.1.29), scales(v.1.1.1), gridExtra(v.2.3), RColorBrewer(v.1.1-2), ggpubr(v.0.4.0), harmonicmeanp(v.3.0), FMStable(v.0.1-2), metap(v.1.4), ngsReports(v.1.4.2), RUVSeq(v.1.22.0), EDASeq(v.2.22.0), ShortRead(v.1.46.0), GenomicAlignments(v.1.24.0), SummarizedExperiment(v.1.18.2), DelayedArray(v.0.14.1), matrixStats(v.0.56.0), Rsamtools(v.2.4.0), Biostrings(v.2.56.0), XVector(v.0.28.0), BiocParallel(v.1.22.0), fgsea(v.1.14.0), rtracklayer(v.1.48.0), GenomicFeatures(v.1.40.1), AnnotationDbi(v.1.50.3), Biobase(v.2.48.0), GenomicRanges(v.1.40.0), GenomeInfoDb(v.1.24.2), IRanges(v.2.22.2), S4Vectors(v.0.26.1), AnnotationHub(v.2.20.2), BiocFileCache(v.1.12.1), dbplyr(v.1.4.4), BiocGenerics(v.0.34.0), biomaRt(v.2.44.1), edgeR(v.3.30.3), limma(v.3.44.3), readxl(v.1.3.1), reshape(v.0.8.8), forcats(v.0.5.0), stringr(v.1.4.0), dplyr(v.1.0.2), purrr(v.0.3.4), readr(v.1.3.1), tidyr(v.1.1.2), tibble(v.3.0.3), ggplot2(v.3.3.2), tidyverse(v.1.3.0) and magrittr(v.1.5)

loaded via a namespace (and not attached): rappdirs(v.0.3.1), R.methodsS3(v.1.8.1), bit64(v.4.0.5), aroma.light(v.3.18.0), multcomp(v.1.4-13), R.utils(v.2.10.1), data.table(v.1.13.0), hwriter(v.1.3.2), KEGGREST(v.1.28.0), RCurl(v.1.98-1.2), generics(v.0.0.2), lambda.r(v.1.2.4), TH.data(v.1.0-10), RSQLite(v.2.2.0), bit(v.4.0.4), mutoss(v.0.1-12), webshot(v.0.5.2), xml2(v.1.3.2), lubridate(v.1.7.9), httpuv(v.1.5.4), assertthat(v.0.2.1), xfun(v.0.16), hms(v.0.5.3), evaluate(v.0.14), promises(v.1.1.1), fansi(v.0.4.1), progress(v.1.2.2), Rgraphviz(v.2.32.0), DBI(v.1.1.0), geneplotter(v.1.66.0), tmvnsim(v.1.0-2), htmlwidgets(v.1.5.1), ellipsis(v.0.3.1), backports(v.1.1.9), FactoMineR(v.2.3), annotate(v.1.66.0), gbRd(v.0.4-11), vctrs(v.0.3.4), here(v.0.1), abind(v.1.4-5), withr(v.2.2.0), prettyunits(v.1.1.1), mnormt(v.2.0.1), cluster(v.2.1.0), lazyeval(v.0.2.2), crayon(v.1.3.4), genefilter(v.1.70.0), labeling(v.0.3), pkgconfig(v.2.0.3), nlme(v.3.1-149), ProtGenerics(v.1.20.0), rlang(v.0.4.7), lifecycle(v.0.2.0), sandwich(v.2.5-1), mathjaxr(v.1.0-1), modelr(v.0.1.8), cellranger(v.1.1.0), rprojroot(v.1.3-2), graph(v.1.66.0), Matrix(v.1.2-18), carData(v.3.0-4), Rhdf5lib(v.1.10.1), zoo(v.1.8-8), reprex(v.0.3.0), png(v.0.1-7), viridisLite(v.0.3.0), bitops(v.1.0-6), R.oo(v.1.24.0), blob(v.1.2.1), jpeg(v.0.1-8.1), rstatix(v.0.6.0), ggsignif(v.0.6.0), leaps(v.3.1), memoise(v.1.1.0), plyr(v.1.8.6), bibtex(v.0.4.2.2), zlibbioc(v.1.34.0), compiler(v.4.0.2), plotrix(v.3.7-8), KEGGgraph(v.1.48.0), cli(v.2.0.2), formatR(v.1.7), mgcv(v.1.8-33), MASS(v.7.3-52), tidyselect(v.1.1.0), stringi(v.1.4.6), highr(v.0.8), yaml(v.2.2.1), askpass(v.1.1), locfit(v.1.5-9.4), latticeExtra(v.0.6-29), fastmatch(v.1.1-0), tools(v.4.0.2), rio(v.0.5.16), rstudioapi(v.0.11), foreign(v.0.8-80), farver(v.2.0.3), scatterplot3d(v.0.3-41), digest(v.0.6.25), BiocManager(v.1.30.10), shiny(v.1.5.0), Rcpp(v.1.0.5), car(v.3.0-9), broom(v.0.7.0), BiocVersion(v.3.11.1), later(v.1.1.0.1), org.Hs.eg.db(v.3.11.4), httr(v.1.4.2), ggdendro(v.0.1.21), Rdpack(v.1.0.0), colorspace(v.1.4-1), rvest(v.0.3.6), XML(v.3.99-0.5), fs(v.1.5.0), truncnorm(v.1.0-8), splines(v.4.0.2), statmod(v.1.4.34), sn(v.1.6-2), multtest(v.2.44.0), plotly(v.4.9.2.1), xtable(v.1.8-4), jsonlite(v.1.7.0), futile.options(v.1.0.1), flashClust(v.1.01-2), R6(v.2.4.1), TFisher(v.0.2.0), pillar(v.1.4.6), htmltools(v.0.5.0), mime(v.0.9), glue(v.1.4.2), fastmap(v.1.0.1), DT(v.0.15), DESeq(v.1.39.0), interactiveDisplayBase(v.1.26.3), codetools(v.0.2-16), mvtnorm(v.1.1-1), lattice(v.0.20-41), numDeriv(v.2016.8-1.1), curl(v.4.3), zip(v.2.1.1), openxlsx(v.4.1.5), openssl(v.1.4.2), survival(v.3.2-3), rmarkdown(v.2.3), munsell(v.0.5.0), rhdf5(v.2.32.2), GenomeInfoDbData(v.1.2.3), haven(v.2.3.1), reshape2(v.1.4.4) and gtable(v.0.3.0)